Data Visualisation and R

Introduction

  • trained as a freshwater ecologist
  • using R and python for almost 20 years
  • Data Analyst for last 10 years
  • worked for Monash University and EPA

Department of Jobs Skills and Industry

  • data analysis
  • economic analysis
  • data driven web content

Why R

Why R for Data Visualisation

R is not just for statistics

  • Active Community
  • Extensive capabilities
  • Reporting and web frameworks

Why R for Data Visualisation

R is not just for statistics

  • publication frameworks
    • rmarkdown
    • quarto
    • shiny
  • charting libraries

Examples

Creative Industries

Creative Industries

Victorian tourism

Victorian tourism

Victorian tourism

R for Data Visualisation

Charting libraries

R’s htmlwidgets framework

Charting library examples

ggplot2

highcharter

from line charts to maps

Data

Victorian unemployment data

# A tibble: 1,104 × 6
   table_no series                        series_id series_type date       value
   <chr>    <chr>                         <chr>     <chr>       <date>     <dbl>
 1 6202005  Unemployment rate ;  Persons… A8442335… Seasonally… 1978-02-01  5.96
 2 6202005  Unemployment rate ;  Persons… A8442369… Original    1978-02-01  6.58
 3 6202005  Unemployment rate ;  Persons… A8442335… Seasonally… 1978-03-01  5.72
 4 6202005  Unemployment rate ;  Persons… A8442369… Original    1978-03-01  5.94
 5 6202005  Unemployment rate ;  Persons… A8442335… Seasonally… 1978-04-01  5.49
 6 6202005  Unemployment rate ;  Persons… A8442369… Original    1978-04-01  5.50
 7 6202005  Unemployment rate ;  Persons… A8442335… Seasonally… 1978-05-01  5.64
 8 6202005  Unemployment rate ;  Persons… A8442369… Original    1978-05-01  5.71
 9 6202005  Unemployment rate ;  Persons… A8442335… Seasonally… 1978-06-01  5.53
10 6202005  Unemployment rate ;  Persons… A8442369… Original    1978-06-01  5.41
# ℹ 1,094 more rows

Data from ABS Labour Force

Victorian regional unemployment data

# A tibble: 10 × 6
   table_no series                        series_id series_type date       value
   <chr>    <chr>                         <chr>     <chr>       <date>     <dbl>
 1 6291016  >>> Melbourne - North East ;… A8459955… Original    2024-01-01   4.2
 2 6291016  >>> Melbourne - Inner ;  Une… A8459965… Original    2024-01-01   4.5
 3 6291016  >>> Ballarat ;  Unemployment… A8459966… Original    2024-01-01   7.6
 4 6291016  >>> Geelong ;  Unemployment … A8459967… Original    2024-01-01   3  
 5 6291016  >>> Hume ;  Unemployment rat… A8459967… Original    2024-01-01   2.8
 6 6291016  >>> Latrobe - Gippsland ;  U… A8459968… Original    2024-01-01   4.4
 7 6291016  >>> Melbourne - Outer East ;… A8459985… Original    2024-01-01   3.8
 8 6291016  >>> Melbourne - South East ;… A8459992… Original    2024-01-01   4.5
 9 6291016  >>> Victoria - North West ; … A8459992… Original    2024-01-01   2.6
10 6291016  >>> Melbourne - Inner East ;… A8460001… Original    2024-01-01   3.4

Get spatial data

Use sf package to process spatial data

library(sf)

sa4 <- sf::read_sf("data/SA4_2021_AUST_GDA2020.shp") |>
  dplyr::filter(STE_NAME21 == "Victoria",
                !(SA4_CODE21 %in% c("297", "299"))) |>
  dplyr::select(SA4_NAME21)

regional_employment <- left_join(sa4, regional_emp)

ggplot2

ggplot2 was first developed in 2007. Based on the “grammar of graphics” it allows you to build a chart by iteratively adding components.

ggplot2 line chart

gg_emp <- emp |>
  ggplot(aes(x = date, y = value, colour = series_type)) +
  geom_line() +
  scale_color_manual(values = c('#595959','#F8766D')) +
  labs(
    title = "Victorian unemployment rate", 
    subtitle = paste0("Monthly data since ", format(min(emp$date), "%B %Y")),
    y = "unemployment rate", x = "") 

ggplot map

gg_map <- regional_employment |>
  ggplot(aes(fill = value)) +
  geom_sf() +
  scale_fill_gradient(low = "#E2E8F4",
                      high = "#02359A") + 
  labs(title = "Victorian regional unemployment rate",
       subtitle = paste0("Regional unemployment rates for ", 
                         format(max(regional_employment$date), "%B %Y")))

ggplot map

ggplot style

gg_style <- theme(
    plot.background = element_rect(fill = "#505050", color = "#505050"),
    plot.title = element_text(color = "#fefefe"),
    plot.subtitle = element_text(color = "#fefefe"),
    plot.margin = margin(10,10,10,10),
    panel.background = element_rect(fill = "#505050", color = "#505050"),
    panel.grid = element_blank(),
    legend.background = element_rect(fill='transparent', color = "#505050"), 
    legend.box.background = element_rect(fill='transparent'), 
    legend.text = element_text(size = rel(0.8), color = "#fefefe"),
    legend.text.align = NULL,
    legend.title = element_text(hjust = 0, color = "#fefefe")
  )

ggplot styled map

highcharter

highcharter package is a wrapper for the highcharts JS library

highcharter line chart

hc_emp <- emp |>
  hchart(hcaes(x = date, y = value, group = series_type), type = "line") |>
  hc_title(text = "Victorian unemployment rate") |>
  hc_subtitle(text = paste0("Monthly data since ", 
                            format(min(emp$date), "%B %Y"))) |>
  hc_xAxis(title = list(text = NULL)) |>
  hc_yAxis(title = list(text = "unemployment rate"))


highcharter maps

aus_map <- hcmap(
  "countries/au/au-all",
  borderWidth = 0.5,
  download_map_data = TRUE) |>
  hc_size(width = 500, height = 500)

highcharter map data

library(geojsonsf)

# convert shp to json
regional_json <- regional_employment |>
  sf::st_simplify(dTolerance = 10) |>
  geojsonsf::sf_geojson() 

highcharter map

hc_map <- highchart(type = "map") |>
  hc_add_series(
    name = "Unemployment",
    mapData = regional_json,
    data = regional_employment |> select(SA4_NAME21, value),
    value = "value",
    joinBy = "SA4_NAME21",
    borderWidth = 0.5
  ) |>
  hc_tooltip(pointFormat = "{point.SA4_NAME21}: {point.value}%") |>
  hc_colorAxis(min = min(regional_employment$value) |> floor(), 
               minColor = "#E2E8F4",
               max = max(regional_employment$value) |> ceiling(),
               maxColor = "#02359A") |>
  hc_title(text = "Victorian regional unemployment rate") |>
  hc_subtitle(text = paste0("Regional unemployment rates for ", 
                            format(max(regional_employment$date), "%B %Y")))

highcharter map theme


hc_map_theme <- highcharter::hc_theme(
      chart = list(
        backgroundColor = "transparent"
      ),
      title = list(
        style = list(
          color = "#fefefe",
          `font-weight` =  "bold",
          `font-size` = "1.5rem",
          `background-color` = "transparent"
        )),
      subtitle = list(
        style = list(
          color = "#fefefe"
        )),
      colorAxis = list(
        labels = list(
          style = list(
            color = "#fefefe"))))

highcharter map

This presentation

source code

slides

R resources

https://ggplot2.tidyverse.org/index.html

https://jkunst.com/highcharter/

https://r-graphics.org/

https://ggplot2-book.org/

DJSIR

Victoria’s Economic Overview Portal

Victorian Labour Force

Victoria’s global trade

Victorian food and fibre exports

Victoria’s creative economy

Tourism in Victoria